Setup notebook
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(phyloseq)
library(microbiome)
##
## microbiome R package (microbiome.github.com)
##
##
##
## Copyright (C) 2011-2022 Leo Lahti,
## Sudarshan Shetty et al. <microbiome.github.io>
##
##
## Attaching package: 'microbiome'
##
## The following object is masked from 'package:ggplot2':
##
## alpha
##
## The following object is masked from 'package:base':
##
## transform
Functions
#input = dataframe of features x labels (including sample names)
# List of foods in order of model importance
plot_food_direction = function(food_ranking, data, variable) {
for(food in food_ranking) {
cols = colnames(data)
if(food %in% cols){
plot = data %>%
select(variable, food) %>%
ggplot()+
geom_boxplot(aes_string(x= variable, y= food, colour = variable))+
geom_jitter(aes_string(x= variable, y= food, colour = variable), height=0)+
theme_classic()
print (plot)
}
}
}
Load data
aha_pred = read_csv("/Users/aa370/Library/CloudStorage/Box-Box/project_davidlab/LAD_LAB_Personnel/Ammara_A/Projects/Machine_learning/LAD_lab_ML/code/aha_to_choice_predictions.csv")
## Rows: 3420 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): sample, pred, id, treatment, true_week, timepoint
## dbl (4): Case, Control, auc, iteration
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
aha_importance = read_csv("/Users/aa370/Library/CloudStorage/Box-Box/project_davidlab/LAD_LAB_Personnel/Ammara_A/Projects/Machine_learning/LAD_lab_ML/code/aha_to_choice_feature_importance.csv")
## Rows: 20 Columns: 191
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): label
## dbl (190): ATCCTATTATTTTATTATTTTACGAAACTAAACAAAGGTTCAGCAAGCGAGAATAATAAAAAAAG...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pomms_pred = read_csv("/Users/aa370/Library/CloudStorage/Box-Box/project_davidlab/LAD_LAB_Personnel/Ammara_A/Projects/Machine_learning/LAD_lab_ML/code/pomms_to_choice_predictions.csv")
## Rows: 3420 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): sample, pred, id, treatment, true_week, timepoint
## dbl (4): Case, Control, auc, iteration
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pomms_importance = read_csv("/Users/aa370/Library/CloudStorage/Box-Box/project_davidlab/LAD_LAB_Personnel/Ammara_A/Projects/Machine_learning/LAD_lab_ML/code/pomms_to_choice_feature_importance.csv")
## Rows: 20 Columns: 226
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): label
## dbl (225): ATCACGTTTTCCGAAAACAAACAAAGGTTCAGAAAGCGAAAATAAAAAAG, ATCCTTATTTTGA...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
AUC range
aha_pred %>%
select(auc) %>%
distinct() %>%
ggplot()+
geom_boxplot(aes(y= auc, x= "AHA"))+
geom_point(aes(y= auc, x= "AHA"), alpha = 0.5)+
labs(title = "AHA AUC range")+
theme_classic()

pomms_pred %>%
select(auc) %>%
distinct() %>%
ggplot()+
geom_boxplot(aes(y= auc, x= "POMMS"))+
geom_point(aes(y= auc, x= "POMMS"), alpha = 0.5)+
labs(title = "POMMS AUC range")+
theme_classic()

Best AUC
AHA
max = aha_pred$auc %>%
max()
aha_pred %>%
filter(auc == max) %>%
ggplot()+
geom_boxplot(aes(x= treatment, y= Case, colour = timepoint))+
geom_point(aes(x= treatment, y= Case, colour = timepoint), position=position_jitterdodge(), alpha = 0.2)+
labs(title ="AHA to CHOICE")+
theme_classic()

aha_pred %>%
filter(auc == max) %>%
ggplot()+
geom_boxplot(aes(x= treatment, y= Control, colour = timepoint))+
geom_point(aes(x= treatment, y= Control, colour = timepoint), position=position_jitterdodge(), alpha = 0.2)+
labs(title ="AHA to CHOICE")+
theme_classic()

Distribution
aha_pred %>%
filter(auc == max) %>%
ggplot()+
geom_density(aes(x= Control, color= treatment))+
facet_wrap(~timepoint)+
theme_classic()

Pre
x= aha_pred %>%
filter(auc == max) %>%
filter(timepoint == "Pre" & treatment == "Intervention") %>%
pull(Control)
y= aha_pred %>%
filter(auc == max) %>%
filter(timepoint == "Pre" & treatment == "Control") %>%
pull(Control)
t.test(x, y)
##
## Welch Two Sample t-test
##
## data: x and y
## t = 2.1144, df = 93.424, p-value = 0.03714
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.004675737 0.148883704
## sample estimates:
## mean of x mean of y
## 0.6038182 0.5270385
wilcox.test(x, y)
##
## Wilcoxon rank sum test with continuity correction
##
## data: x and y
## W = 1409.5, p-value = 0.05132
## alternative hypothesis: true location shift is not equal to 0
Treatment
x= aha_pred %>%
filter(auc == max) %>%
filter(timepoint == "Treatment" & treatment == "Intervention") %>%
pull(Control)
y= aha_pred %>%
filter(auc == max) %>%
filter(timepoint == "Treatment" & treatment == "Control") %>%
pull(Control)
t.test(x, y)
##
## Welch Two Sample t-test
##
## data: x and y
## t = 2.7936, df = 192.26, p-value = 0.005741
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.01923667 0.11164626
## sample estimates:
## mean of x mean of y
## 0.5970204 0.5315789
wilcox.test(x, y)
##
## Wilcoxon rank sum test with continuity correction
##
## data: x and y
## W = 6847, p-value = 0.004645
## alternative hypothesis: true location shift is not equal to 0
Followup
x= aha_pred %>%
filter(auc == max) %>%
filter(timepoint == "Followup" & treatment == "Intervention") %>%
pull(Control)
y= aha_pred %>%
filter(auc == max) %>%
filter(timepoint == "Followup" & treatment == "Control") %>%
pull(Control)
t.test(x, y)
##
## Welch Two Sample t-test
##
## data: x and y
## t = 0.47849, df = 10.292, p-value = 0.6423
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1577945 0.2445252
## sample estimates:
## mean of x mean of y
## 0.5337500 0.4903846
wilcox.test(x, y)
## Warning in wilcox.test.default(x, y): cannot compute exact p-value with ties
##
## Wilcoxon rank sum test with continuity correction
##
## data: x and y
## W = 113, p-value = 0.73
## alternative hypothesis: true location shift is not equal to 0
Intervention
Pre vs Treatment
aha_summary = aha_pred %>%
filter(auc == max) %>%
filter(treatment == "Intervention") %>%
group_by(id, timepoint) %>%
summarise(mean_control = mean(Control),
mean_case = mean(Case)) %>%
pivot_wider(names_from = "timepoint", values_from = c("mean_control", "mean_case"))
## `summarise()` has grouped output by 'id'. You can override using the `.groups`
## argument.
x= aha_summary %>%
select(id, mean_control_Pre, mean_control_Treatment) %>%
na.omit() %>%
pull(mean_control_Pre)
y= aha_summary %>%
select(id, mean_control_Pre, mean_control_Treatment) %>%
na.omit() %>%
pull(mean_control_Treatment)
t.test(x, y, paired = TRUE)
##
## Paired t-test
##
## data: x and y
## t = 0.20292, df = 13, p-value = 0.8423
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.08750838 0.10565123
## sample estimates:
## mean of the differences
## 0.009071429
wilcox.test(x, y, paired = TRUE)
##
## Wilcoxon signed rank exact test
##
## data: x and y
## V = 58, p-value = 0.7609
## alternative hypothesis: true location shift is not equal to 0
Pre vs Followup
x= aha_summary %>%
select(id, mean_control_Pre, mean_control_Followup) %>%
na.omit() %>%
pull(mean_control_Pre)
y= aha_summary %>%
select(id, mean_control_Pre, mean_control_Followup) %>%
na.omit() %>%
pull(mean_control_Followup)
t.test(x, y, paired = TRUE)
##
## Paired t-test
##
## data: x and y
## t = 0.19893, df = 3, p-value = 0.855
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.2999486 0.3399486
## sample estimates:
## mean of the differences
## 0.02
wilcox.test(x, y, paired = TRUE)
##
## Wilcoxon signed rank exact test
##
## data: x and y
## V = 5, p-value = 1
## alternative hypothesis: true location shift is not equal to 0
Control
Pre vs Treatment
aha_summary = aha_pred %>%
filter(auc == max) %>%
filter(treatment == "Control") %>%
group_by(id, timepoint) %>%
summarise(mean_control = mean(Control),
mean_case = mean(Case)) %>%
pivot_wider(names_from = "timepoint", values_from = c("mean_control", "mean_case"))
## `summarise()` has grouped output by 'id'. You can override using the `.groups`
## argument.
x= aha_summary %>%
select(id, mean_control_Pre, mean_control_Treatment) %>%
na.omit() %>%
pull(mean_control_Pre)
y= aha_summary %>%
select(id, mean_control_Pre, mean_control_Treatment) %>%
na.omit() %>%
pull(mean_control_Treatment)
t.test(x, y, paired = TRUE)
##
## Paired t-test
##
## data: x and y
## t = -0.010936, df = 15, p-value = 0.9914
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.09386748 0.09290915
## sample estimates:
## mean of the differences
## -0.0004791667
wilcox.test(x, y, paired = TRUE)
##
## Wilcoxon signed rank exact test
##
## data: x and y
## V = 73, p-value = 0.8209
## alternative hypothesis: true location shift is not equal to 0
Pre vs Followup
x= aha_summary %>%
select(id, mean_control_Pre, mean_control_Followup) %>%
na.omit() %>%
pull(mean_control_Pre)
y= aha_summary %>%
select(id, mean_control_Pre, mean_control_Followup) %>%
na.omit() %>%
pull(mean_control_Followup)
t.test(x, y, paired = TRUE)
##
## Paired t-test
##
## data: x and y
## t = 0.6429, df = 11, p-value = 0.5335
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.08563164 0.15629830
## sample estimates:
## mean of the differences
## 0.03533333
wilcox.test(x, y, paired = TRUE)
## Warning in wilcox.test.default(x, y, paired = TRUE): cannot compute exact
## p-value with ties
##
## Wilcoxon signed rank test with continuity correction
##
## data: x and y
## V = 45.5, p-value = 0.6377
## alternative hypothesis: true location shift is not equal to 0
POMMS
max = pomms_pred$auc %>%
max()
pomms_pred %>%
filter(auc == max) %>%
ggplot()+
geom_boxplot(aes(x= treatment, y= Case, colour = timepoint))+
geom_point(aes(x= treatment, y= Case, colour = timepoint), position=position_jitterdodge(), alpha = 0.2)+
labs(title ="POMMS to CHOICE")+
theme_classic()

pomms_pred %>%
filter(auc == max) %>%
ggplot()+
geom_boxplot(aes(x= treatment, y= Control, colour = timepoint))+
geom_point(aes(x= treatment, y= Control, colour = timepoint), position=position_jitterdodge(), alpha = 0.2)+
labs(title ="POMMS to CHOICE")+
theme_classic()

Distribution
pomms_pred %>%
filter(auc == max) %>%
ggplot()+
geom_density(aes(x= Control, color = treatment))+
facet_wrap(~ timepoint)+
theme_classic()

Pre
x= pomms_pred %>%
filter(auc == max) %>%
filter(timepoint == "Pre" & treatment == "Intervention") %>%
pull(Control)
y= pomms_pred %>%
filter(auc == max) %>%
filter(timepoint == "Pre" & treatment == "Control") %>%
pull(Control)
t.test(x, y)
##
## Welch Two Sample t-test
##
## data: x and y
## t = -1.6717, df = 40.872, p-value = 0.1022
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.15161052 0.01429584
## sample estimates:
## mean of x mean of y
## 0.1977273 0.2663846
wilcox.test(x, y)
## Warning in wilcox.test.default(x, y): cannot compute exact p-value with ties
##
## Wilcoxon rank sum test with continuity correction
##
## data: x and y
## W = 223.5, p-value = 0.1994
## alternative hypothesis: true location shift is not equal to 0
Treatment
x= pomms_pred %>%
filter(auc == max) %>%
filter(timepoint == "Treatment" & treatment == "Intervention") %>%
pull(Control)
y= pomms_pred %>%
filter(auc == max) %>%
filter(timepoint == "Treatment" & treatment == "Control") %>%
pull(Control)
t.test(x, y)
##
## Welch Two Sample t-test
##
## data: x and y
## t = 0.56771, df = 98.804, p-value = 0.5715
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.04020794 0.07243565
## sample estimates:
## mean of x mean of y
## 0.2410612 0.2249474
wilcox.test(x, y)
##
## Wilcoxon rank sum test with continuity correction
##
## data: x and y
## W = 1468, p-value = 0.6528
## alternative hypothesis: true location shift is not equal to 0
Followup
x= pomms_pred %>%
filter(auc == max) %>%
filter(timepoint == "Followup" & treatment == "Intervention") %>%
pull(Control)
y= pomms_pred %>%
filter(auc == max) %>%
filter(timepoint == "Followup" & treatment == "Control") %>%
pull(Control)
t.test(x, y)
##
## Welch Two Sample t-test
##
## data: x and y
## t = 0.42625, df = 4.8114, p-value = 0.6883
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.2121460 0.2952999
## sample estimates:
## mean of x mean of y
## 0.3145000 0.2729231
wilcox.test(x, y)
## Warning in wilcox.test.default(x, y): cannot compute exact p-value with ties
##
## Wilcoxon rank sum test with continuity correction
##
## data: x and y
## W = 30, p-value = 0.6912
## alternative hypothesis: true location shift is not equal to 0
Intervention
Pre vs Treatment
pomms_summary = pomms_pred %>%
filter(auc == max) %>%
filter(treatment == "Intervention") %>%
group_by(id, timepoint) %>%
summarise(mean_control = mean(Control),
mean_case = mean(Case)) %>%
pivot_wider(names_from = "timepoint", values_from = c("mean_control", "mean_case"))
## `summarise()` has grouped output by 'id'. You can override using the `.groups`
## argument.
x= pomms_summary %>%
select(id, mean_control_Pre, mean_control_Treatment) %>%
na.omit() %>%
pull(mean_control_Pre)
y= pomms_summary %>%
select(id, mean_control_Pre, mean_control_Treatment) %>%
na.omit() %>%
pull(mean_control_Treatment)
t.test(x, y, paired = TRUE)
##
## Paired t-test
##
## data: x and y
## t = -1.1386, df = 13, p-value = 0.2754
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.12759031 0.03951889
## sample estimates:
## mean of the differences
## -0.04403571
wilcox.test(x, y, paired = TRUE)
## Warning in wilcox.test.default(x, y, paired = TRUE): cannot compute exact
## p-value with ties
##
## Wilcoxon signed rank test with continuity correction
##
## data: x and y
## V = 34.5, p-value = 0.2718
## alternative hypothesis: true location shift is not equal to 0
Pre vs Followup
x= pomms_summary %>%
select(id, mean_control_Pre, mean_control_Followup) %>%
na.omit() %>%
pull(mean_control_Pre)
y= pomms_summary %>%
select(id, mean_control_Pre, mean_control_Followup) %>%
na.omit() %>%
pull(mean_control_Followup)
t.test(x, y, paired = TRUE)
##
## Paired t-test
##
## data: x and y
## t = -0.85578, df = 3, p-value = 0.455
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.3833991 0.2208991
## sample estimates:
## mean of the differences
## -0.08125
wilcox.test(x, y, paired = TRUE)
##
## Wilcoxon signed rank exact test
##
## data: x and y
## V = 4, p-value = 0.875
## alternative hypothesis: true location shift is not equal to 0
Control
Pre vs Treatment
pomms_summary = pomms_pred %>%
filter(auc == max) %>%
filter(treatment == "Control") %>%
group_by(id, timepoint) %>%
summarise(mean_control = mean(Control),
mean_case = mean(Case)) %>%
pivot_wider(names_from = "timepoint", values_from = c("mean_control", "mean_case"))
## `summarise()` has grouped output by 'id'. You can override using the `.groups`
## argument.
x= pomms_summary %>%
select(id, mean_control_Pre, mean_control_Treatment) %>%
na.omit() %>%
pull(mean_control_Pre)
y= pomms_summary %>%
select(id, mean_control_Pre, mean_control_Treatment) %>%
na.omit() %>%
pull(mean_control_Treatment)
t.test(x, y, paired = TRUE)
##
## Paired t-test
##
## data: x and y
## t = 0.98651, df = 15, p-value = 0.3395
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.06131838 0.16698505
## sample estimates:
## mean of the differences
## 0.05283333
wilcox.test(x, y, paired = TRUE)
##
## Wilcoxon signed rank exact test
##
## data: x and y
## V = 81, p-value = 0.5282
## alternative hypothesis: true location shift is not equal to 0
Pre vs Followup
x= pomms_summary %>%
select(id, mean_control_Pre, mean_control_Followup) %>%
na.omit() %>%
pull(mean_control_Pre)
y= pomms_summary %>%
select(id, mean_control_Pre, mean_control_Followup) %>%
na.omit() %>%
pull(mean_control_Followup)
t.test(x, y, paired = TRUE)
##
## Paired t-test
##
## data: x and y
## t = 0.45961, df = 11, p-value = 0.6547
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1499719 0.2291386
## sample estimates:
## mean of the differences
## 0.03958333
wilcox.test(x, y, paired = TRUE)
##
## Wilcoxon signed rank exact test
##
## data: x and y
## V = 41, p-value = 0.9097
## alternative hypothesis: true location shift is not equal to 0
AUCs above 0.7
AHA
aha_pred %>%
filter(auc > 0.7) %>%
ggplot()+
geom_boxplot(aes(x= treatment, y= Case, colour = timepoint))+
geom_point(aes(x= treatment, y= Case, colour = timepoint), position=position_jitterdodge(), alpha = 0.2)+
labs(title ="AHA to CHOICE")+
theme_classic()

aha_pred %>%
filter(auc > 0.7) %>%
ggplot()+
geom_boxplot(aes(x= treatment, y= Control, colour = timepoint))+
geom_point(aes(x= treatment, y= Control, colour = timepoint), position=position_jitterdodge(), alpha = 0.2)+
labs(title ="AHA to CHOICE")+
theme_classic()

Distribution
aha_pred %>%
filter(auc > 0.7) %>%
ggplot()+
geom_density(aes(x= Control, color= treatment))+
facet_wrap(~timepoint)+
theme_classic()

Pre
x= aha_pred %>%
filter(auc > 0.7) %>%
filter(timepoint == "Pre" & treatment == "Intervention") %>%
pull(Control)
y= aha_pred %>%
filter(auc > 0.7) %>%
filter(timepoint == "Pre" & treatment == "Control") %>%
pull(Control)
t.test(x, y)
##
## Welch Two Sample t-test
##
## data: x and y
## t = 4.5063, df = 382, p-value = 8.782e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.0485088 0.1236066
## sample estimates:
## mean of x mean of y
## 0.5962500 0.5101923
wilcox.test(x, y)
##
## Wilcoxon rank sum test with continuity correction
##
## data: x and y
## W = 22610, p-value = 7.088e-05
## alternative hypothesis: true location shift is not equal to 0
Treatment
x= aha_pred %>%
filter(auc > 0.7) %>%
filter(timepoint == "Treatment" & treatment == "Intervention") %>%
pull(Control)
y= aha_pred %>%
filter(auc > 0.7) %>%
filter(timepoint == "Treatment" & treatment == "Control") %>%
pull(Control)
t.test(x, y)
##
## Welch Two Sample t-test
##
## data: x and y
## t = 4.334, df = 780.45, p-value = 1.656e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.02925595 0.07770055
## sample estimates:
## mean of x mean of y
## 0.5691888 0.5157105
wilcox.test(x, y)
##
## Wilcoxon rank sum test with continuity correction
##
## data: x and y
## W = 105144, p-value = 9.264e-06
## alternative hypothesis: true location shift is not equal to 0
Followup
x= aha_pred %>%
filter(auc > 0.7) %>%
filter(timepoint == "Followup" & treatment == "Intervention") %>%
pull(Control)
y= aha_pred %>%
filter(auc > 0.7) %>%
filter(timepoint == "Followup" & treatment == "Control") %>%
pull(Control)
t.test(x, y)
##
## Welch Two Sample t-test
##
## data: x and y
## t = 0.26787, df = 49.501, p-value = 0.7899
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.07659395 0.10016126
## sample estimates:
## mean of x mean of y
## 0.4861875 0.4744038
wilcox.test(x, y)
##
## Wilcoxon rank sum test with continuity correction
##
## data: x and y
## W = 1714.5, p-value = 0.7975
## alternative hypothesis: true location shift is not equal to 0
Intervention
Pre vs Treatment
aha_summary = aha_pred %>%
filter(auc > 0.7) %>%
filter(treatment == "Intervention") %>%
group_by(id, timepoint) %>%
summarise(mean_control = mean(Control),
mean_case = mean(Case)) %>%
pivot_wider(names_from = "timepoint", values_from = c("mean_control", "mean_case"))
## `summarise()` has grouped output by 'id'. You can override using the `.groups`
## argument.
x= aha_summary %>%
select(id, mean_control_Pre, mean_control_Treatment) %>%
na.omit() %>%
pull(mean_control_Pre)
y= aha_summary %>%
select(id, mean_control_Pre, mean_control_Treatment) %>%
na.omit() %>%
pull(mean_control_Treatment)
t.test(x, y, paired = TRUE)
##
## Paired t-test
##
## data: x and y
## t = 0.6955, df = 13, p-value = 0.499
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.06647149 0.12959054
## sample estimates:
## mean of the differences
## 0.03155952
wilcox.test(x, y, paired = TRUE)
##
## Wilcoxon signed rank exact test
##
## data: x and y
## V = 62, p-value = 0.583
## alternative hypothesis: true location shift is not equal to 0
Pre vs Followup
x= aha_summary %>%
select(id, mean_control_Pre, mean_control_Followup) %>%
na.omit() %>%
pull(mean_control_Pre)
y= aha_summary %>%
select(id, mean_control_Pre, mean_control_Followup) %>%
na.omit() %>%
pull(mean_control_Followup)
t.test(x, y, paired = TRUE)
##
## Paired t-test
##
## data: x and y
## t = 0.78609, df = 3, p-value = 0.4892
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.2365409 0.3917284
## sample estimates:
## mean of the differences
## 0.07759375
wilcox.test(x, y, paired = TRUE)
##
## Wilcoxon signed rank exact test
##
## data: x and y
## V = 6, p-value = 0.875
## alternative hypothesis: true location shift is not equal to 0
Control
Pre vs Treatment
aha_summary = aha_pred %>%
filter(auc > 0.7) %>%
filter(treatment == "Control") %>%
group_by(id, timepoint) %>%
summarise(mean_control = mean(Control),
mean_case = mean(Case)) %>%
pivot_wider(names_from = "timepoint", values_from = c("mean_control", "mean_case"))
## `summarise()` has grouped output by 'id'. You can override using the `.groups`
## argument.
x= aha_summary %>%
select(id, mean_control_Pre, mean_control_Treatment) %>%
na.omit() %>%
pull(mean_control_Pre)
y= aha_summary %>%
select(id, mean_control_Pre, mean_control_Treatment) %>%
na.omit() %>%
pull(mean_control_Treatment)
t.test(x, y, paired = TRUE)
##
## Paired t-test
##
## data: x and y
## t = -0.02082, df = 15, p-value = 0.9837
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1048561 0.1028274
## sample estimates:
## mean of the differences
## -0.001014323
wilcox.test(x, y, paired = TRUE)
##
## Wilcoxon signed rank exact test
##
## data: x and y
## V = 80, p-value = 0.5619
## alternative hypothesis: true location shift is not equal to 0
Pre vs Followup
x= aha_summary %>%
select(id, mean_control_Pre, mean_control_Followup) %>%
na.omit() %>%
pull(mean_control_Pre)
y= aha_summary %>%
select(id, mean_control_Pre, mean_control_Followup) %>%
na.omit() %>%
pull(mean_control_Followup)
t.test(x, y, paired = TRUE)
##
## Paired t-test
##
## data: x and y
## t = 0.45607, df = 11, p-value = 0.6572
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.09943616 0.15141533
## sample estimates:
## mean of the differences
## 0.02598958
wilcox.test(x, y, paired = TRUE)
##
## Wilcoxon signed rank exact test
##
## data: x and y
## V = 41, p-value = 0.9097
## alternative hypothesis: true location shift is not equal to 0
POMMS
pomms_pred %>%
filter(auc > 0.7) %>%
ggplot()+
geom_boxplot(aes(x= treatment, y= Case, colour = timepoint))+
geom_point(aes(x= treatment, y= Case, colour = timepoint), position=position_jitterdodge(), alpha = 0.2)+
labs(title ="POMMS to CHOICE")+
theme_classic()

pomms_pred %>%
filter(auc > 0.7) %>%
ggplot()+
geom_boxplot(aes(x= treatment, y= Control, colour = timepoint))+
geom_point(aes(x= treatment, y= Control, colour = timepoint), position=position_jitterdodge(), alpha = 0.2)+
labs(title ="POMMS to CHOICE")+
theme_classic()

Distribution
pomms_pred %>%
filter(auc > 0.7) %>%
ggplot()+
geom_density(aes(x= Control, color = treatment))+
facet_wrap(~ timepoint)+
theme_classic()

Pre
x= pomms_pred %>%
filter(auc > 0.7) %>%
filter(timepoint == "Pre" & treatment == "Intervention") %>%
pull(Control)
y= pomms_pred %>%
filter(auc > 0.7) %>%
filter(timepoint == "Pre" & treatment == "Control") %>%
pull(Control)
t.test(x, y)
##
## Welch Two Sample t-test
##
## data: x and y
## t = -2.9254, df = 168.01, p-value = 0.003916
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.10330446 -0.02005568
## sample estimates:
## mean of x mean of y
## 0.2059545 0.2676346
wilcox.test(x, y)
##
## Wilcoxon rank sum test with continuity correction
##
## data: x and y
## W = 3791, p-value = 0.04087
## alternative hypothesis: true location shift is not equal to 0
Treatment
x= pomms_pred %>%
filter(auc > 0.7) %>%
filter(timepoint == "Treatment" & treatment == "Intervention") %>%
pull(Control)
y= pomms_pred %>%
filter(auc > 0.7) %>%
filter(timepoint == "Treatment" & treatment == "Control") %>%
pull(Control)
t.test(x, y)
##
## Welch Two Sample t-test
##
## data: x and y
## t = 1.3917, df = 413.85, p-value = 0.1648
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.007239887 0.042342644
## sample estimates:
## mean of x mean of y
## 0.2424286 0.2248772
wilcox.test(x, y)
##
## Wilcoxon rank sum test with continuity correction
##
## data: x and y
## W = 24264, p-value = 0.127
## alternative hypothesis: true location shift is not equal to 0
Followup
x= pomms_pred %>%
filter(auc > 0.7) %>%
filter(timepoint == "Followup" & treatment == "Intervention") %>%
pull(Control)
y= pomms_pred %>%
filter(auc > 0.7) %>%
filter(timepoint == "Followup" & treatment == "Control") %>%
pull(Control)
t.test(x, y)
##
## Welch Two Sample t-test
##
## data: x and y
## t = -0.34954, df = 24.465, p-value = 0.7297
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.09837287 0.06985364
## sample estimates:
## mean of x mean of y
## 0.2451250 0.2593846
wilcox.test(x, y)
##
## Wilcoxon rank sum test with continuity correction
##
## data: x and y
## W = 390, p-value = 0.7123
## alternative hypothesis: true location shift is not equal to 0
Intervention
Pre vs Treatment
pomms_summary = pomms_pred %>%
filter(auc > 0.7) %>%
filter(treatment == "Intervention") %>%
group_by(id, timepoint) %>%
summarise(mean_control = mean(Control),
mean_case = mean(Case)) %>%
pivot_wider(names_from = "timepoint", values_from = c("mean_control", "mean_case"))
## `summarise()` has grouped output by 'id'. You can override using the `.groups`
## argument.
x= pomms_summary %>%
select(id, mean_control_Pre, mean_control_Treatment) %>%
na.omit() %>%
pull(mean_control_Pre)
y= pomms_summary %>%
select(id, mean_control_Pre, mean_control_Treatment) %>%
na.omit() %>%
pull(mean_control_Treatment)
t.test(x, y, paired = TRUE)
##
## Paired t-test
##
## data: x and y
## t = -1.3032, df = 13, p-value = 0.2151
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.09111587 0.02255040
## sample estimates:
## mean of the differences
## -0.03428274
wilcox.test(x, y, paired = TRUE)
##
## Wilcoxon signed rank exact test
##
## data: x and y
## V = 31, p-value = 0.1937
## alternative hypothesis: true location shift is not equal to 0
Pre vs Followup
x= pomms_summary %>%
select(id, mean_control_Pre, mean_control_Followup) %>%
na.omit() %>%
pull(mean_control_Pre)
y= pomms_summary %>%
select(id, mean_control_Pre, mean_control_Followup) %>%
na.omit() %>%
pull(mean_control_Followup)
t.test(x, y, paired = TRUE)
##
## Paired t-test
##
## data: x and y
## t = -0.015393, df = 3, p-value = 0.9887
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.220733 0.218608
## sample estimates:
## mean of the differences
## -0.0010625
wilcox.test(x, y, paired = TRUE)
##
## Wilcoxon signed rank exact test
##
## data: x and y
## V = 5, p-value = 1
## alternative hypothesis: true location shift is not equal to 0
Control
Pre vs Treatment
pomms_summary = pomms_pred %>%
filter(auc > 0.7) %>%
filter(treatment == "Control") %>%
group_by(id, timepoint) %>%
summarise(mean_control = mean(Control),
mean_case = mean(Case)) %>%
pivot_wider(names_from = "timepoint", values_from = c("mean_control", "mean_case"))
## `summarise()` has grouped output by 'id'. You can override using the `.groups`
## argument.
x= pomms_summary %>%
select(id, mean_control_Pre, mean_control_Treatment) %>%
na.omit() %>%
pull(mean_control_Pre)
y= pomms_summary %>%
select(id, mean_control_Pre, mean_control_Treatment) %>%
na.omit() %>%
pull(mean_control_Treatment)
t.test(x, y, paired = TRUE)
##
## Paired t-test
##
## data: x and y
## t = 1.1884, df = 15, p-value = 0.2532
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.04392559 0.15462871
## sample estimates:
## mean of the differences
## 0.05535156
wilcox.test(x, y, paired = TRUE)
##
## Wilcoxon signed rank exact test
##
## data: x and y
## V = 88, p-value = 0.3225
## alternative hypothesis: true location shift is not equal to 0
Pre vs Followup
x= pomms_summary %>%
select(id, mean_control_Pre, mean_control_Followup) %>%
na.omit() %>%
pull(mean_control_Pre)
y= pomms_summary %>%
select(id, mean_control_Pre, mean_control_Followup) %>%
na.omit() %>%
pull(mean_control_Followup)
t.test(x, y, paired = TRUE)
##
## Paired t-test
##
## data: x and y
## t = 0.54705, df = 11, p-value = 0.5953
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1315796 0.2186213
## sample estimates:
## mean of the differences
## 0.04352083
wilcox.test(x, y, paired = TRUE)
##
## Wilcoxon signed rank exact test
##
## data: x and y
## V = 41, p-value = 0.9097
## alternative hypothesis: true location shift is not equal to 0
ASV lookup table
# combined = readRDS("/Users/aa370/Library/CloudStorage/Box-Box/project_davidlab/LAD_LAB_Personnel/Ammara_A/Projects/CHOICE/All_cohorts/data_objects/CHOICE_POMMS_AHA_combined_20221213.rds")
#
# lookup = combined %>%
# tax_table() %>%
# as.data.frame() %>%
# as_tibble(rownames = NA) %>%
# rownames_to_column("asv")
common_names = read_csv("/Users/aa370/Library/CloudStorage/Box-Box/project_davidlab/LAD_LAB_Personnel/Brianna P/diet/food-dbs/data/outputs/dada2-compatible/trnL/trnLGH ASV common names.csv")
## Rows: 405 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): asv, name, taxon, common_name
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
CHOICE
c_ps = readRDS("/Users/aa370/Library/CloudStorage/Box-Box/project_davidlab/LAD_LAB_Personnel/Ammara_A/Projects/CHOICE/CHOICE_Trnl/CHOICE_20220912/ps_objects/choice_complete_20221205_clean.rds")
otu_clr = abundances(c_ps, "clr") %>% # clr transform
t()
c_ps = phyloseq(otu_table(otu_clr, taxa_are_rows = FALSE),
sample_data(sample_data(c_ps)),
tax_table(tax_table(c_ps)))
samdf = c_ps %>%
sample_data() %>%
as.data.frame() %>%
as_tibble(rownames = NA) %>%
rownames_to_column("sample") %>%
select(sample, id, treatment, true_week, timepoint)
otudf = c_ps %>%
otu_table() %>%
as.data.frame() %>%
as_tibble(rownames = NA) %>%
rownames_to_column("sample")
input = samdf %>%
left_join(otudf)
## Joining, by = "sample"
Best AUC importances
max = aha_pred$auc %>%
max()
#colnames(aha_importance)
importance_summary = aha_importance %>%
filter(auc == max) %>%
pivot_longer(cols = c("ATCCTATTATTTTATTATTTTACGAAACTAAACAAAGGTTCAGCAAGCGAGAATAATAAAAAAAG":"CATAAACTGGTAGCGACCGGCACCACCGGTAAACTGATCGCAGAAGCTACCGGCT"), names_to = "food", values_to = "importance") %>%
group_by(label, food) %>%
summarise(mean_importance = mean(importance)) %>%
arrange(label, desc(mean_importance))
## `summarise()` has grouped output by 'label'. You can override using the
## `.groups` argument.
ranking = importance_summary %>%
pull(food)
top10 = ranking[1:10]
plot_food_direction(top10, input, "treatment")
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(variable)
##
## # Now:
## data %>% select(all_of(variable))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(food)
##
## # Now:
## data %>% select(all_of(food))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation ideoms with `aes()`










name_list = c()
for(item in top10){
name = common_names %>%
filter(str_detect(asv, item)) %>%
pull(common_name)
name_list = append(name_list, name)
}
name_list
## [1] "cocoa bean"
## [2] "avocado, NA, NA, NA, NA, NA"
## [3] "NA, ceylon cinnamon, bay leaf, avocado, NA, NA"
## [4] "goji berry, NA, goji berry, cutleaf groundcherry, tomatillo, nightshade, NA, blackberry nightshade, potato, NA"
## [5] "garlic"
## [6] "muscadine grape, common grape vine, NA, NA"
## [7] "common grape vine"
## [8] "common grape vine"
## [9] "black chokeberry, apple, NA, crab apple, european pear, NA, afghan pear, nashi pear, siberian pear, hybrid chinese white pear"
## [10] "NA, pecan"
## [11] "sesame"
max = pomms_pred$auc %>%
max()
#colnames(pomms_importance)
importance_summary = pomms_importance %>%
filter(auc == max) %>%
pivot_longer(cols = c("ATCACGTTTTCCGAAAACAAACAAAGGTTCAGAAAGCGAAAATAAAAAAG":"ATCCTGTTTTCTCAAAACAAACAAAGGTTCAGAAAAAAAG"), names_to = "food", values_to = "importance") %>%
group_by(label, food) %>%
summarise(mean_importance = mean(importance)) %>%
arrange(label, desc(mean_importance))
## `summarise()` has grouped output by 'label'. You can override using the
## `.groups` argument.
ranking = importance_summary %>%
pull(food)
top10 = ranking[1:10]
plot_food_direction(top10, input, "treatment")








name_list = c()
for(item in top10){
name = common_names %>%
filter(str_detect(asv, item)) %>%
pull(common_name)
name_list = append(name_list, name)
}
name_list
## [1] "dandelion"
## [2] "sunflower, jerusalem artichoke, sunchoke, dandelion"
## [3] "ashanti pepper, long pepper, black pepper, wild betel"
## [4] "canola, rapeseed"
## [5] "brown mustard, canola, rapeseed, black mustard, cabbage, broccoli, cauliflower, kale, Brussels sprouts, collard greens, savoy, kohlrabi, gai lan, mustards, bok choy, canola, rapeseed, field mustard, napa cabbage, turnip, rocket, NA, white mustard"
## [6] "oregano, NA, thyme"
## [7] "sage"
## [8] "sage"
## [9] "NA, NA, rosemary"
## [10] "lemon balm, rosemary"
## [11] "sesame"
## [12] NA
## [13] NA
## [14] "NA, NA"
## [15] NA
## [16] "rye, NA, NA, NA, NA, NA, bread wheat, NA, NA, NA, emmer wheat, spelt, einkorn, wild einkorn, spelt, NA, domesticated hulled wheat, NA, rivet wheat, durum wheat, wild einkorn, NA"
## [17] "coconut"
## [18] "fennel, dill, angelica, cumin, wild carrot, parsnip, parsley"
## [19] "lettuce"
## [20] "flaxseed"
## [21] "tomato, NA"
AUC above 0.7 importances
#colnames(aha_importance)
importance_summary = aha_importance %>%
filter(auc >0.7) %>%
pivot_longer(cols = c("ATCCTATTATTTTATTATTTTACGAAACTAAACAAAGGTTCAGCAAGCGAGAATAATAAAAAAAG":"CATAAACTGGTAGCGACCGGCACCACCGGTAAACTGATCGCAGAAGCTACCGGCT"), names_to = "food", values_to = "importance") %>%
group_by(label, food) %>%
summarise(mean_importance = mean(importance)) %>%
arrange(label, desc(mean_importance))
## `summarise()` has grouped output by 'label'. You can override using the
## `.groups` argument.
ranking = importance_summary %>%
pull(food)
top10 = ranking[1:10]
plot_food_direction(top10, input, "treatment")










name_list = c()
for(item in top10){
name = common_names %>%
filter(str_detect(asv, item)) %>%
pull(common_name)
name_list = append(name_list, name)
}
name_list
## [1] "cocoa bean"
## [2] "avocado, NA, NA, NA, NA, NA"
## [3] "NA, ceylon cinnamon, bay leaf, avocado, NA, NA"
## [4] "garlic"
## [5] "goji berry, NA, goji berry, cutleaf groundcherry, tomatillo, nightshade, NA, blackberry nightshade, potato, NA"
## [6] "muscadine grape, common grape vine, NA, NA"
## [7] "common grape vine"
## [8] "common grape vine"
## [9] "sesame"
## [10] "NA, pecan"
## [11] "black chokeberry, apple, NA, crab apple, european pear, NA, afghan pear, nashi pear, siberian pear, hybrid chinese white pear"
importance_summary = pomms_importance %>%
filter(auc >0.7) %>%
pivot_longer(cols = c("ATCACGTTTTCCGAAAACAAACAAAGGTTCAGAAAGCGAAAATAAAAAAG":"ATCCTGTTTTCTCAAAACAAACAAAGGTTCAGAAAAAAAG"), names_to = "food", values_to = "importance") %>%
group_by(label, food) %>%
summarise(mean_importance = mean(importance)) %>%
arrange(label, desc(mean_importance))
## `summarise()` has grouped output by 'label'. You can override using the
## `.groups` argument.
ranking = importance_summary %>%
pull(food)
top10 = ranking[1:10]
plot_food_direction(top10, input, "treatment")








name_list = c()
for(item in top10){
name = common_names %>%
filter(str_detect(asv, item)) %>%
pull(common_name)
name_list = append(name_list, name)
}
name_list
## [1] "dandelion"
## [2] "sunflower, jerusalem artichoke, sunchoke, dandelion"
## [3] "tea"
## [4] "ashanti pepper, long pepper, black pepper, wild betel"
## [5] "canola, rapeseed"
## [6] "brown mustard, canola, rapeseed, black mustard, cabbage, broccoli, cauliflower, kale, Brussels sprouts, collard greens, savoy, kohlrabi, gai lan, mustards, bok choy, canola, rapeseed, field mustard, napa cabbage, turnip, rocket, NA, white mustard"
## [7] "rye, NA, NA, NA, NA, NA, bread wheat, NA, NA, NA, emmer wheat, spelt, einkorn, wild einkorn, spelt, NA, domesticated hulled wheat, NA, rivet wheat, durum wheat, wild einkorn, NA"
## [8] "fennel, dill, angelica, cumin, wild carrot, parsnip, parsley"
## [9] "goji berry, NA, goji berry, cutleaf groundcherry, tomatillo, nightshade, NA, blackberry nightshade, potato, NA"
## [10] "coconut"
## [11] "lettuce"
## [12] "oat"